** Data reading and variable selection from raw data
** Japanese General Social Survey 2012

** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/
use /*read your data here*/  


** 02. Consructing year and country variables **

ge year=2012
lab var year "survey year"

ge country=392
lab var country "ISO country code"
//Japan: 392 (see "ISO Country Codes.pdf) 


** 03. ID variables **

ge pid=IDUSE
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge sex=SEXA
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

ge age=AGEB
lab var age "age"

ge birthyr=year-age
lab var birthyr "year of birth"


** 05. Siblings **

ge nbro=XNUMBROE+XNUMBROY
ge nsis=XNUMSISE+XNUMSISY

ge nsibs=nbro+nsis

ge birthorder=XNUMBROE+XNUMSISE+1

lab var nbro "number of brothers"
lab var nsis "number of sisters"
lab var nsibs "number of siblings"
lab var birthorder "birth order"
//missing
lab def nbro 1998 "missing"
lab val nbro nbro

lab def nsis 1000 "dont know" 1001 "not applicable" 1998 "no answer" 
lab val nsis nsis

lab def nsibs 1000 "dont know" 1001 "not applicable" 3996 "no answer" 
lab val nsibs nsibs

lab def birthorder 1000 "dont know" 1999 "no answer" 
lab val birthorder birthorder


** 06. Own education **

ge educ_a=XXLSTSCH 
lab var educ_a "last school attended"

lab def educ_a 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "College of technology" 11 "2-year college" 12 "university" 13 "graduate school" 14 "don't know" 99 "no answer"

lab val educ_a educ_a

ge educ_c=DOLSTSCH
la var educ_c "completion of the last school"
lab def educ_c 1 "graduated" 2 "quit" 3 "still a student" 9 "no answer"
lab val educ_c educ_c

ge educ=educ_a if educ_c==1 | educ_c==3
replace educ=educ_a-1 if educ_c==2 & educ_a<14
replace educ=14 if educ_c==2 & educ_a>=14
replace educ=99 if educ_c==9

lab def educ 0 "no completed formal education" 1 "ordinary elementary_old system" 2 "higher elementary_old system" ///
3 "junior high/girls high_old system" 4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" ///
7 "university/graduate_old system" 8 "junior high" 9 "high" 10 "College of technology" 11 "2-year college" ///
12 "university" 13 "graduate school" 14 "don't know" 99 "no answer"

lab val educ educ


** 07. Parents' education: Father and/or Mother **

ge faeduc=PPLSTSCH
lab var faeduc "father's education"

lab def faeduc 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "College of technology" 11 "2-year college" 12 "university" 13 "graduate school" 14 "don't know" 99 "no answer"

lab val faeduc faeduc 

ge moeduc=MMLSTSCH
lab var moeduc "mother's education"
lab def moeduc 1 "ordinary elementary_old system" 2 "higher elementary_old system" 3 "junior high/girls high_old system" ///
4 "vocational_old system" 5 "normal_old system" 6 "higher or vocational_old system/higher normal" 7 "university/graduate_old system" ///
8 "junior high" 9 "high" 10 "College of technology" 11 "2-year college" 12 "university" 13 "graduate school" 14 "don't know" 99 "no answer"

lab val moeduc moeduc


** 08. Own occupation **

ge firstocc=XXFSTJB
lab var firstocc "first occupation"
ge occ=XXJOB
lab var occ "current occupation"
lab def occ 986 "no occupation" 998 "not applicable" 999 "no answer"
lab val firstocc occ occ

ge occ_ISCO=XXJOBE
lab var occ_ISCO "current occupation_ISCO"
lab def occ_ISCO 99999 "No answer" 88888 "Not applicable"
lab val occ_ISCO occ_ISCO

ge job=XWORK
lab var job "if the respondent ever had a job"
lab def job 1 "yes" 2 "no" 8 "not applicable" 9 "no answer"
lab val job job

ge firstempstat=TPFSTJB
lab var firstempstat "work arrangement of first job"
ge empstat=TPJOB
lab var empstat "employment status/work arrangement of current occupation"

lab def empstat 1 "Executive of a company or a corporation" 2 "Regular employee" 3 "Temporary worker, Daily worker, Part-time" ///
4 "Dispatched worker from temporary personnel" 5 "Self-employed" 6 "Family worker" 7 "Don't know" 8 "Not applicable" 9 "No answer"

lab val firstempstat empstat empstat


** 09. Parents' occupation **

ge faocc_y=PPJBXX15
lab var faocc_y "father's occupation when respondent's 15"
lab def faocc_y 986/987 "no occupation" 998 "not applicable" 999 "no answer"
lab val faocc_y faocc_y

ge faempstat_y=PPJBT15S
lab var faempstat_y "father's employment status when respondent's 15"

lab def faempstat_y 1 "Regular employee" 2 "Part-time temporary worker" 3 "Dispatched worker from temporary person" ///
4 "Contract employee" 5 "Non-regular employee-Shokutaku" 6 "dont know" 8 "Not applicable" 9 "No answer" 

lab val faempstat_y faempstat_y 

ge faemptitle_y=PPJBT15P
lab var faemptitle_y "Job title of father when respondent's 15"

lab def faemptitle_y 1 "No managerial position" 2 "Group leader, foreman" 3 "sub-section head" 4 "section head, manager" ///
5 "department head, general manager" 6 "other managerial position" 7 "dont know" 8 "not applicable" 9 "no answer"

lab val faemptitle_y faemptitle_y

ge facopsize_y=PPJBSZ15
lab var facopsize_y "Corporation size of father when respondent's 15"

lab def facopsize_y 1 "one-person firm" 2 "small - 2-29 employees" 3 "medium-sized - 30-299 employees" 4 "large - 300-399 employees" ///
5 "major - 1000 or above" 6 "government agency" 7 "dont know" 8 "not applicable" 9 "no answer"

lab val facopsize_y facopsize_y

ge fawork=PPJOB
la var fawork "if father is working now"
lab def fawork 1 "yes" 2 "no" 8 "not applicable" 9 "no answer"
lab val fawork fawork

ge moempstat_y=MMJBTP15
lab var moempstat_y "mother's employment status when respondent's 15"

lab def moempstat_y 1 "She was not working" 2 "Temporary worker, Daily worker, Part-time worker" ///
3 "Regular employee - non-management position" 4 "Regular employee - managerial position" 5 "Regular employee - professional like nurse & teacher" ///
6 "Regular employee - dont know about occupation" 7 "self-employed/family worker - agricultural" 8 "self-employed/family worker - other" ///
9 "doing piecework at home" 10 "executive of a company or a corporation" 11 "I didnt have a mother at the time" 12 "dont know" 99 "no answer"

lab val moempstat_y moempstat_y

ge mowork=MMJOB
lab var mowork "if mother is working now"
lab def mowork 1 "yes" 2 "no" 8 "not applicable" 9 "no answer"
lab val mowork mowork


** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** Japan General Social Survey 2012

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs nbro nsis birthorder, d

** R's Own Education **
tab1 educ 

** Parental Education **
tab1 faeduc moeduc 

** R's Own Occupation **
tab1 occ empstat 

** Parental Occupation **
tab1 fawork faocc_y faempstat_y mowork moempstat_y 

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nbro nsis nsibs birthorder ///
	 educ faeduc moeduc ///
	 firstocc firstempstat occ occ_ISCO empstat ///
	 fawork faocc_y faempstat_y mowork moempstat_y


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace



** 13. Homoginising education **
** Own Education **
rename educ educ_cat

ge educ_yrs=0 if educ_cat==0
replace educ_yrs=6 if educ_cat==1
replace educ_yrs=8 if educ_cat==2
replace educ_yrs=10 if educ_cat==3
replace educ_yrs=11 if educ_cat==4
replace educ_yrs=14 if educ_cat==5
replace educ_yrs=14 if educ_cat==6
replace educ_yrs=16 if educ_cat==7
replace educ_yrs=9 if educ_cat==8
replace educ_yrs=12 if educ_cat==9
replace educ_yrs=14 if educ_cat==10
replace educ_yrs=14 if educ_cat==11
replace educ_yrs=16 if educ_cat==12
replace educ_yrs=20 if educ_cat==13
replace educ_yrs=. if educ_cat==14
replace educ_yrs=. if educ_cat==99
lab var educ_yrs "respondent highest education in years"

ge educ_ISCED=020 if educ_cat==0
replace educ_ISCED=100 if educ_cat==1
replace educ_ISCED=100 if educ_cat==2
replace educ_ISCED=300 if educ_cat==3
replace educ_ISCED=300 if educ_cat==4
replace educ_ISCED=300 if educ_cat==5
replace educ_ISCED=500 if educ_cat==6
replace educ_ISCED=600 if educ_cat==7
replace educ_ISCED=244 if educ_cat==8
replace educ_ISCED=344 if educ_cat==9
replace educ_ISCED=500 if educ_cat==10
replace educ_ISCED=500 if educ_cat==11
replace educ_ISCED=665 if educ_cat==12
replace educ_ISCED=767 if educ_cat==13
replace educ_ISCED=. if educ_cat==14
replace educ_ISCED=. if educ_cat==99
lab var educ_ISCED "respondent highest education in ISCED code"

** Parents Education **

ge faeduc_flag=1 

rename faeduc faeduc_cat
rename moeduc maeduc_cat

ge faeduc_yrs=0 if faeduc_cat==0
replace faeduc_yrs=6 if faeduc_cat==1
replace faeduc_yrs=8 if faeduc_cat==2
replace faeduc_yrs=10 if faeduc_cat==3
replace faeduc_yrs=11 if faeduc_cat==4
replace faeduc_yrs=14 if faeduc_cat==5
replace faeduc_yrs=14 if faeduc_cat==6
replace faeduc_yrs=16 if faeduc_cat==7
replace faeduc_yrs=9 if faeduc_cat==8
replace faeduc_yrs=12 if faeduc_cat==9
replace faeduc_yrs=14 if faeduc_cat==10
replace faeduc_yrs=14 if faeduc_cat==11
replace faeduc_yrs=16 if faeduc_cat==12
replace faeduc_yrs=20 if faeduc_cat==13
replace faeduc_yrs=. if faeduc_cat==14
replace faeduc_yrs=. if faeduc_cat==99
lab var faeduc_yrs "father's education in years"

ge maeduc_yrs=0 if maeduc_cat==0
replace maeduc_yrs=6 if maeduc_cat==1
replace maeduc_yrs=8 if maeduc_cat==2
replace maeduc_yrs=10 if maeduc_cat==3
replace maeduc_yrs=11 if maeduc_cat==4
replace maeduc_yrs=14 if maeduc_cat==5
replace maeduc_yrs=14 if maeduc_cat==6
replace maeduc_yrs=16 if maeduc_cat==7
replace maeduc_yrs=9 if maeduc_cat==8
replace maeduc_yrs=12 if maeduc_cat==9
replace maeduc_yrs=14 if maeduc_cat==10
replace maeduc_yrs=14 if maeduc_cat==11
replace maeduc_yrs=16 if maeduc_cat==12
replace maeduc_yrs=20 if maeduc_cat==13
replace maeduc_yrs=. if maeduc_cat==14
replace maeduc_yrs=. if maeduc_cat==99
lab var maeduc_yrs "mother's education in years"

ge faeduc_ISCED=020 if faeduc_cat==0
replace faeduc_ISCED=100 if faeduc_cat==1
replace faeduc_ISCED=100 if faeduc_cat==2
replace faeduc_ISCED=300 if faeduc_cat==3
replace faeduc_ISCED=300 if faeduc_cat==4
replace faeduc_ISCED=300 if faeduc_cat==5
replace faeduc_ISCED=500 if faeduc_cat==6
replace faeduc_ISCED=600 if faeduc_cat==7
replace faeduc_ISCED=244 if faeduc_cat==8
replace faeduc_ISCED=344 if faeduc_cat==9
replace faeduc_ISCED=500 if faeduc_cat==10
replace faeduc_ISCED=500 if faeduc_cat==11
replace faeduc_ISCED=665 if faeduc_cat==12
replace faeduc_ISCED=767 if faeduc_cat==13
replace faeduc_ISCED=. if faeduc_cat==14
replace faeduc_ISCED=. if faeduc_cat==99
lab var faeduc_ISCED "father highest education in ISCED code"

ge maeduc_ISCED=020 if maeduc_cat==0
replace maeduc_ISCED=100 if maeduc_cat==1
replace maeduc_ISCED=100 if maeduc_cat==2
replace maeduc_ISCED=300 if maeduc_cat==3
replace maeduc_ISCED=300 if maeduc_cat==4
replace maeduc_ISCED=300 if maeduc_cat==5
replace maeduc_ISCED=500 if maeduc_cat==6
replace maeduc_ISCED=600 if maeduc_cat==7
replace maeduc_ISCED=244 if maeduc_cat==8
replace maeduc_ISCED=344 if maeduc_cat==9
replace maeduc_ISCED=500 if maeduc_cat==10
replace maeduc_ISCED=500 if maeduc_cat==11
replace maeduc_ISCED=665 if maeduc_cat==12
replace maeduc_ISCED=767 if maeduc_cat==13
replace maeduc_ISCED=. if maeduc_cat==14
replace maeduc_ISCED=. if maeduc_cat==99
lab var maeduc_ISCED "mother highest education in ISCED code"

** 14. Homoginising sibling **
//cutoff
ge nbro_flag=99
lab var nbro_flag "cutoff of number of brothers"
ge nsis_flag=99
lab var nsis_flag "cutoff of number of sisters"
ge nsibs_flag=99
lab var nsibs_flag "cutoff of total number of siblings"

lab def nsib_flag 99 "no cutoff"
lab val nbro_flag nsis_flag nsibs_flag nsib_flag

//recode missing
replace nbro=. if nbro==1998
replace nsis=. if nsis==1000 | nsis==1001 | nsis==1998
replace nsibs=. if nsibs==1000 | nsibs==1001 | nsibs==3996

** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nbro nsis nsibs nbro_flag nsis_flag nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace
